Sklearn comes with multiple preloaded datasets for data manipulation, regression, or classification. They are loaded with the following commands
Classification datasets:
Regression datsets:
Multivariate regression:
from sklearn.datasets import load_name
name = load_name()
In [1]:
from sklearn.datasets import load_iris
iris = load_iris()
To see this options, type iris. then tab after importing.
Select any of the following:
iris.data
iris.DESCR
iris.feature_names
iris.target
iris.target_names
In [2]:
import pandas as pd
In [3]:
iris_features_df = pd.DataFrame(data=iris.data,
columns=iris.feature_names)
iris_features_df.head(2)
Out[3]:
In [4]:
iris_target_df = pd.DataFrame(data=iris.target,
columns=["Species"])
iris_target_df.head(2)
Out[4]:
In [5]:
list(iris.target_names) #0 - setosa, 1 - versicolor, 2- virginica
Out[5]:
In [6]:
print(iris.DESCR)
In [7]:
from sklearn.datasets import load_breast_cancer,load_boston,load_diabetes,load_linnerud,load_digits
In [8]:
datasets = {'Iris':load_iris() ,'Breast Cancer':load_breast_cancer(),'Boston':load_boston(),
'Diabetes':load_diabetes(),'Linnerud':load_linnerud()}
for dataset in datasets:
print("\n** {} **".format(dataset))
print('{}'.format(datasets[dataset].feature_names))
In [9]:
datasets = {'Iris':load_iris() ,'Breast Cancer':load_breast_cancer(),'Digits':load_digits(),'Linnerud':load_linnerud()}
for dataset in datasets:
print("\n** {} **".format(dataset))
print('{}'.format(datasets[dataset].target_names))
In [10]:
datasets = {'Iris':load_iris() ,'Breast Cancer':load_breast_cancer(),'Boston':load_boston(),'Digits':load_digits(),
'Diabetes':load_diabetes(),'Linnerud':load_linnerud()}
for dataset in datasets:
print("\n** {} **".format(dataset))
print('{}'.format(datasets[dataset].data.shape))